Report - 2020-05-09

In [1]:
%matplotlib inline
import pandas as pd
import geopandas as gpd
import numpy as np
import datetime
import matplotlib.pyplot as plt
import matplotlib
import json
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import os
from scipy.optimize import curve_fit
from scipy.interpolate import interp1d
font = {'family' : 'Linux Biolinum',
        'weight' : 'bold',
        'size'   : 22}

matplotlib.rc('font', **font)
path_to_geojson = "geo-data-hungary/GeoJSON/"

Statistical data used for the analysis

In [2]:
population = 9769012
ventilators = 1e-5 * population * 22.8 * 0.5
ratio_of_ventilator_use = 0.10

Description of the dataset

  • Origin of data: https://koronavirus.gov.hu
  • Structure:
    • Date: the date when the given data has been published
    • Active: registered active cases at the time of the publication of the data
    • Deaths: death cases accounted to COVID-19
    • Recovered: number of cases when an active case recovered
    • New: new cases compared to previous data publishing
    • DailyRate: the number of new cases on a given day divided by the previous day's
In [3]:
df = pd.read_csv("dataset/koronavirus.gov.hu.csv")
df["New"] = (df["Active"] + df["Deaths"] + df["Recovered"]).diff()
df["DailyRate"] = df["New"]
for i in range(1, df.shape[0]):
    if df["New"].iloc[i-1] == 0:
        df.at[i, "DailyRate"] =  np.NaN
    else:
        df.at[i, "DailyRate"] = df["DailyRate"].iloc[i] / df["New"].iloc[i-1]
df
Out[3]:
Date Active Deaths Recovered New DailyRate
0 2020-03-04 2 0 0 NaN NaN
1 2020-03-05 4 0 0 2.0 NaN
2 2020-03-06 4 0 0 0.0 0.000000
3 2020-03-07 5 0 0 1.0 NaN
4 2020-03-08 7 0 0 2.0 2.000000
... ... ... ... ... ... ...
62 2020-05-05 1993 363 709 29.0 0.763158
63 2020-05-06 1979 373 759 46.0 1.586207
64 2020-05-07 1966 383 801 39.0 0.847826
65 2020-05-08 1921 392 865 28.0 0.717949
66 2020-05-09 1904 405 904 35.0 1.250000

67 rows × 6 columns

Used model functions

Exponential function

  • Formula: $f(x)=A\cdot\beta^x$
  • Interpretation: Let's say that we have initially $A$ people who by average create $A\cdot\beta$ new cases per day. If this rate is not changing the data follows a strict exponential. In reality there is a change in $\beta$ because of the different interventions hence the following would be more appropiate:
    • $f(x)=A\cdot\beta^x(x)$
  • Problems of the model:
    • An approximation is needed for $\beta(x)$
    • It is not true that all the infections originate from the initial $A$ since movement between borders were possible and still is for certain cases
  • Advantages:
    • $\beta$ can hide severeal unknown parameters so we can call $A$ and $\beta(x)$ as effective parameters
In [4]:
def exponentialFunction(x, beta, A):
    return A*np.power(beta, x)

Statistics

Active cases

In [5]:
def activeCasesPlot(ylog=False):
    fig = go.Figure()

    fig.add_trace(go.Scatter(x=df["Date"],
                             y=df["Active"],
                             mode='lines+markers',
                             name='Active cases',
                             line_shape="spline",
                             line=dict(width=2,
                                      color="dodgerblue"),
                             marker=dict(size=10),)
                 )

    fig.update_layout(
        title="Active cases in Hungary, total active cases today: %d" % df["Active"].iloc[-1],
        xaxis_title="Date of observation",
        yaxis_title="Number of active cases",
        font=dict(
            family="Linux Biolinum",
            size=18,
        )
    )
    
    if ylog:
        fig.update_layout(
            yaxis_type="log",
        )
        
    fig.show()
In [6]:
activeCasesPlot()
In [7]:
def dailyNewCasesPlot(ylog=False):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df["Date"],
                             y=df["New"],
                             mode='lines+markers',
                             name='Daily new cases',
                             line_shape="spline",
                             line=dict(width=2,
                                      color="dodgerblue"),
                             marker=dict(size=10),)
                 )

    fig.update_layout(
        title="Daily new cases in Hungary, today: %d" % df["New"].iloc[-1],
        xaxis_title="Date of observation",
        yaxis_title="Number of daily new cases",
        font=dict(
            family="Linux Biolinum",
            size=18,
        )
    )
    
    if ylog:
        fig.update_layout(
            yaxis_type="log",
        )
        
    fig.show()
In [8]:
dailyNewCasesPlot()
In [ ]:
 
In [9]:
def dailyNewCasesRatioPlot(ylog=False):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df["Date"],
                             y=df["DailyRate"],
                             mode='lines+markers',
                             name='Ratio of daily new cases compared to previous day',
                             line_shape="spline",
                             line=dict(width=2,
                                      color="dodgerblue"),
                             marker=dict(size=10),)
                 )

    fig.update_layout(
        title="Ratio of daily new cases compared to previous day in Hungary, average: %.2f" % df["DailyRate"].mean(),
        xaxis_title="Date of observation",
        yaxis_title="Ratio of daily new cases",
        font=dict(
            family="Linux Biolinum",
            size=18,
        )
    )
    
    if ylog:
        fig.update_layout(
            yaxis_type="log",
        )
        
    fig.show()
In [10]:
dailyNewCasesRatioPlot()

Deaths

In [34]:
dividers = [16,35,46]
popts = []
pcovs = []
for i in range(len(dividers)):
    if i == len(dividers)-1:
        popt, pcov = curve_fit(exponentialFunction, df.index[dividers[i]:], df.Deaths.iloc[dividers[i]:])
    else:
        popt, pcov = curve_fit(exponentialFunction, df.index[dividers[i]:dividers[i+1]], df.Deaths.iloc[dividers[i]:dividers[i+1]])
    popts.append(popt)
    pcovs.append(pcov)
In [35]:
def deathCasesPlot(ylog=False):
    day_limit=80
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        x=df["Date"],
        y=df["Deaths"].diff(),
        name="Daily death cases",
        marker=dict(
            color="tomato",
        )
    ))
    
    fig.add_trace(go.Scatter(x=df["Date"],
                             y=df["Deaths"],
                             mode='lines+markers',
                             name='Death cases',
                             line_shape="spline",
                             line=dict(width=2,
                                       color='firebrick',
                                      ),
                             marker=dict(size=10),)
                 )
    
    betas = []
    for j,i in enumerate(popts):
        betas.append("%.2f" % i[0])

    fig.add_trace(go.Scatter(
        x=df["Date"].iloc[dividers],
        y=df["Deaths"].iloc[dividers],
        text=betas,
        textposition="top center",
        mode="markers+text",
        name=r"New $\beta$ value",
        marker=dict(
            size=15,
            color="rgba(0, 0, 0, 0.4)"
        )
    ))
    
    for i in range(len(dividers)):
        line = dict(
            width=1,
            dash="dash"
        )
        
        if (i == len(dividers)-1):
            line = dict(
                width=2,
            )
        
        base = df["Date"].iloc[dividers[i]]
        date_list = [datetime.datetime.strptime(base, "%Y-%m-%d") + datetime.timedelta(days=x) for x in range(day_limit-dividers[i])]
        fig.add_trace(go.Scatter(x=date_list,
                                 y=exponentialFunction(range(dividers[i],dividers[i]+len(date_list)), *popts[i]),
                                 mode='lines',
                                 name=r'Fitted exponential $\beta=%.2f$' % popts[i][0],
                                 line_shape="spline",
                                 line=line,
                                 marker=dict(size=10),)
                     )



    fig.update_layout(
        title="Death cases in Hungary, total: %d" % df["Deaths"].iloc[-1],
        xaxis_title="Date of observation",
        yaxis_title="Number of death cases",
        font=dict(
            family="Linux Biolinum",
            size=18,
        )
    )
    
    if ylog:
        fig.update_layout(
            yaxis_type="log",
        )
        
    fig.show()
In [36]:
deathCasesPlot(True)

Recovery

In [14]:
popt, pcov = curve_fit(exponentialFunction, df.index, df.Recovered)
In [15]:
def recoveredCasesPlot(ylog=False):
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        x=df["Date"],
        y=df["Recovered"].diff(),
        name="Daily recovered cases",
        marker=dict(
            color="chartreuse",
        )
    ))
    
    fig.add_trace(go.Scatter(x=df["Date"],
                             y=df["Recovered"],
                             mode='lines+markers',
                             name='Recovered cases',
                             line_shape="spline",
                             line=dict(width=2,
                                       color='seagreen',
                                      ),
                             marker=dict(size=10),))

    fig.update_layout(
        title="Recovered cases in Hungary, total: %d" % df["Recovered"].iloc[-1],
        xaxis_title="Date of observation",
        yaxis_title="Number of recovered cases",
        font=dict(
            family="Linux Biolinum",
            size=18,
        )
    )
    
    if ylog:
        fig.update_layout(
            yaxis_type="log",
        )
        
    fig.show()
In [16]:
recoveredCasesPlot(True)

Combined data

In [17]:
def combinedCasesPlot(ylog=False):
    fig = go.Figure()
    
    
    
    fig.add_trace(go.Bar(
        x=df["Date"],
        y=df["Deaths"],
        name='Death cases',
        marker=dict(
            color="tomato"),
    ))
    
    fig.add_trace(go.Bar(
        x=df["Date"],
        y=df["Recovered"],
        name="Recovered cases",
        marker=dict(
            color="chartreuse",
        )
    ))
    
    fig.add_trace(go.Bar(
        x=df["Date"],
        y=df["Active"],
        name='Active cases',
        marker=dict(
            color="dodgerblue"),
    ))

    fig.update_layout(
        title="Current situation in Hungary",
        barmode="stack",
        xaxis_title="Date of observation",
        yaxis_title="Number of cases",
        font=dict(
            family="Linux Biolinum",
            size=18,
        )
    )
    
    if ylog:
        fig.update_layout(
            yaxis_type="log",
        )
        
    fig.show()
In [18]:
combinedCasesPlot()
In [19]:
def dailyCombinedCasesPlot(ylog=False):
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        x=df["Date"],
        y=df["Deaths"].diff(),
        name='Daily death cases',
        marker=dict(
            color="tomato"),
    ))
    
    fig.add_trace(go.Bar(
        x=df["Date"],
        y=df["Recovered"].diff(),
        name="Daily recovered cases",
        marker=dict(
            color="chartreuse",
        )
    ))
    
    fig.add_trace(go.Bar(
        x=df["Date"],
        y=df["New"],
        name='Daily new cases',
        marker=dict(
            color="dodgerblue"),
    ))

    fig.update_layout(
        title="Current situation in Hungary",
        barmode="stack",
        xaxis_title="Date of observation",
        yaxis_title="Number of daily new cases",
        font=dict(
            family="Linux Biolinum",
            size=18,
        )
    )
    
    if ylog:
        fig.update_layout(
            yaxis_type="log",
        )
        
    fig.show()
In [20]:
dailyCombinedCasesPlot()
In [21]:
fig = go.Figure()


fig.add_trace(go.Scatter(
    x=df["Active"] + df["Deaths"] + df["Recovered"],
    y=df["Deaths"],
    mode='lines+markers',
    name='Recovered cases',
    line_shape="spline",
    line=dict(width=2,
                color='orange',
             ),
    marker=dict(size=10),))


fig.update_layout(
        title="Deaths as a function of registered cases",
        xaxis_title="Number of registered cases",
        yaxis_title="Number of deaths",
        font=dict(
            family="Linux Biolinum",
            size=18,
        ),
        #yaxis_type="log"
)

fig.show()
In [22]:
fig = go.Figure()


fig.add_trace(go.Scatter(
    x=df["Active"] + df["Deaths"] + df["Recovered"],
    y=df["Deaths"].diff() / (df["Active"] + df["Deaths"] + df["Recovered"]).diff(),
    mode='lines+markers',
    name='Recovered cases',
    line=dict(width=2,
                color='orange',
             ),
    marker=dict(size=10),))


fig.update_layout(
        title="Slope of the deaths as a function of registered cases curve",
        xaxis_title="Number of registered cases",
        yaxis_title="Slope",
        font=dict(
            family="Linux Biolinum",
            size=18,
        ),
        #yaxis_type="log"
)

fig.show()

Fitting models

  • In my model I assume that we can see changes in $\beta$ from time to time because of:
    • Introduced policies
    • Events around the world
  • The change is "almost instant" so it happens in 1-2 days
  • Note: Since the virus responsible for COVID-19 is often not producing symptoms till the 14th day, while the person is infectious, there is a delay in the applied policies.
In [23]:
1.08333333333# By index, selected manually
divider_points = [0, 5, 15, 25, 36, 39, 61]

# Extrapolation till this day from 2020-03-04
day_limit = 80

popts = []
pcovs = []
ranges = []
evaluated_fits = []

for i in range(len(divider_points)-1):
    ranges.append(df.index[divider_points[i]:divider_points[i+1]])
    popt, pcov = curve_fit(exponentialFunction,
                           ranges[-1],
                           df["Active"].iloc[divider_points[i]:divider_points[i+1]])
    popts.append(popt)
    pcovs.append(pcov)
    evaluated_fits.append(exponentialFunction(ranges[-1], *popts[-1]))

ranges.append(df.index[divider_points[-1]:])
popt, pcov = curve_fit(exponentialFunction,
                       ranges[-1],
                       df["Active"].iloc[divider_points[-1]:])
popts.append(popt)
pcovs.append(pcov)
evaluated_fits.append(exponentialFunction(ranges[-1], *popts[-1]))

extra_range = range(df.index[-1]+1, day_limit)
extra_y = exponentialFunction(extra_range, *popts[-1])

base = df["Date"].iloc[0]
date_list = [datetime.datetime.strptime(base, "%Y-%m-%d") + datetime.timedelta(days=x) for x in range(day_limit)]

fig = go.Figure()



fig.add_trace(go.Scatter(
    x=date_list,
    y=np.hstack((
        np.hstack(evaluated_fits),
        extra_y
    )),
    name="Model",
    line=dict(
        width=2,
    )
))

fig.add_trace(go.Scatter(x=df["Date"],
                         y=df["Active"],
                         mode='markers',
                         name='Data',
                         marker=dict(
                             size=10,
                         )
                        ))

betas = []
for i in popts:
    betas.append("%.2f" % i[0])
    
fig.add_trace(go.Scatter(
    x=df["Date"].iloc[divider_points],
    y=df["Active"].iloc[divider_points],
    text=betas,
    textposition="top center",
    mode="markers+text",
    name=r"New $\beta$ value",
    marker=dict(
        size=15,
        color="rgba(0, 0, 0, 0.4)"
    )
))

for j,i in enumerate(popts[:-1]):
    fig.add_trace(go.Scatter(
        x=date_list[divider_points[j]:],
        y=exponentialFunction(range(divider_points[j], day_limit), *i),
        name=r"%.2f" % i[0],
        line=dict(
            width=1,
            dash="dash"
        )
    ))
    
# Calculate ventilator capacity
out_of_stock = np.floor(np.log(ventilators/popts[-1][1]/ratio_of_ventilator_use)/np.log(popts[-1][0]))
healthy = np.floor(np.log(0.1)/np.log(popts[-1][0]))

extrastr = ""
if popts[-1][0] > 1:
    day = datetime.datetime.strptime(base, "%Y-%m-%d") + datetime.timedelta(days=out_of_stock)
    extrastr = ", expected ventilator shortage: %s" % day.strftime("%Y-%m-%d")
else:
    day = datetime.datetime.strptime(base, "%Y-%m-%d") + datetime.timedelta(days=healthy)
    extrastr = ", expected end of pandemic: %s" % day.strftime("%Y-%m-%d")


fig.update_layout(
        title="Active cases in Hungary" + extrastr,
        xaxis_title="Date",
        yaxis_title="Number of cases",
        font=dict(
            family="Linux Biolinum",
            size=18,
        ),
        yaxis_type="log"
)

fig.show()

Geodata

County dataset

  • Each colum header is the name of the given county
  • Date header is the date of the data
  • The numbers represent the detected cases ### County population data
  • Comes from the Hungarian Wikipedia pages of the given counties
In [24]:
# load geodata
base = df["Date"].iloc[0]
today = (datetime.datetime.strptime(base, "%Y-%m-%d") + datetime.timedelta(days=df.index[-1])).strftime("%Y-%m-%d")
yesterday = (datetime.datetime.strptime(base, "%Y-%m-%d") + datetime.timedelta(days=df.index[-2])).strftime("%Y-%m-%d")
geodf = None
for filename in os.listdir(path_to_geojson + "l30-county/"):
    tmp = gpd.read_file(path_to_geojson + "l30-county/" + filename)
    if geodf is None:
        geodf = tmp
    else:
        geodf = pd.concat([geodf, tmp], ignore_index=True)
geodf = geodf.set_index("name")
dfc = pd.read_csv("dataset/county_data.csv")
dfc2 = pd.read_csv("dataset/county_data.csv")
dfc2 = dfc2.set_index("Date")
dfc2 = dfc2.sort_values(today, axis=1, ascending=False)
dfc = dfc.transpose()
new_header = dfc.iloc[0]
dfc = dfc.iloc[1:]
dfc.columns = new_header
geodf = geodf.join(dfc)
geodf["text_coord"] = geodf["geometry"].apply(lambda x: x.representative_point().coords[:])
geodf["text_coord"] = [coords[0] for coords in geodf["text_coord"]]
In [25]:
# load county data

popdf = pd.read_csv("dataset/county_population.csv")
popdf = popdf.transpose()
popdf.columns = ["Population"]
popdf
Out[25]:
Population
Bács-Kiskun megye 524841
Baranya megye 360704
Békés megye 338025
Borsod-Abaúj-Zemplén megye 684793
Budapest 1752286
Csongrád megye 406205
Fejér megye 426120
Győr-Moson-Sopron megye 449967
Hajdú-Bihar megye 539674
Heves megye 307985
Jász-Nagykun-Szolnok megye 386752
Komárom-Eszergom megye 311411
Nógrád megye 201919
Pest megye 1237561
Somogy megye 312084
Szabolcs-Szatmár-Bereg megye 555496
Tolna megye 231183
Vas megye 257688
Veszprém megye 353068
Zala megye 287043
In [26]:
fig, ax = plt.subplots(1, figsize=(15, 15))
ax = geodf.plot(column=today, cmap='Wistia', ax=ax)

for idx, row in geodf.iterrows():
    ax.annotate(s=row[today], xy=row['text_coord'],
                 horizontalalignment='center')

ax.set_title("Cummulative registered cases")
plt.axis("off")
plt.show()
In [27]:
geodf["per1000"] = geodf[today]/popdf["Population"]*1000.

fig, ax = plt.subplots(1, figsize=(15, 15))
ax = geodf.plot(column="per1000", cmap='Wistia', ax=ax)

for idx, row in geodf.iterrows():
    ax.annotate(s="%.2f" % (row[today]/popdf["Population"][row.name]*1000.), xy=row['text_coord'],
                 horizontalalignment='center')

ax.set_title("Cummulative registered cases per 1000 person")
plt.axis("off")
plt.show()
In [28]:
fig, ax = plt.subplots(1, figsize=(15, 15))

geodf["difference"] = geodf[today] - geodf[yesterday]

ax = geodf.plot(column="difference", cmap='Wistia', ax=ax)

for idx, row in geodf.iterrows():
    ax.annotate(s=row["difference"], xy=row['text_coord'],
                 horizontalalignment='center')

ax.set_title("Registered new cases")
plt.axis("off")
plt.show()
In [29]:
fig, ax = plt.subplots(1, figsize=(15, 15))

geodf["differenceper100000"] = geodf["difference"]/popdf["Population"]*100000.

ax = geodf.plot(column="differenceper100000", cmap='Wistia', ax=ax)

for idx, row in geodf.iterrows():
    ax.annotate(s="%.2f" % row["differenceper100000"], xy=row['text_coord'],
                 horizontalalignment='center')

ax.set_title("Registered new cases per 100000 person")
plt.axis("off")
plt.show()
In [30]:
fig = go.Figure()

for i in range(dfc2.shape[1]):
    fig.add_trace(go.Scatter(
        x=dfc2.index,
        y=dfc2[dfc2.columns[i]],
        mode='lines+markers',
        marker=dict(
            symbol=i+1,
            size=10,
        ),
        name=dfc2.columns[i],
    ),)
                             

fig.update_layout(
        title="Registered cases in the different counties",
        xaxis_title="Date",
        yaxis_title="Number of cases",
        font=dict(
            family="Linux Biolinum",
            size=18,
        ),
        yaxis_type="log",
)
fig.show()
In [ ]: